/*******************************************************************************
Grid_Population_and_Employment.do

This file collects information on census block population in 2010 and employment
in 2011. Additionally, it creates shapefiles of 2010 census blocks and obtains 
the coordinates of each census block centroid, which are used to map census blocks 
to squares in the grid of the continential US using the geoinpoly command. It 
then totals employment and population in each grid cell, and calculates 
population and employment densities. Employment is calculated at the block level 
using LODES data for the year 2011. Population is calculated at the block level 
using the 2010 decennial census. 

Last updated: 4/8/2019
*******************************************************************************/

version 15
cd "C:\Plants_in_Space"
set more off
set type double
set varabbrev off

**************************MAKE CENSUS BLOCK SHAPEFILES**************************
// Here we make shapefiles for each census block for 2010. Note that
// census blocks are not always permanent through the decade and may be split
// when a change in geographic boundaries occurs (i.e. a new county is formed).
// However, when a census block is split it has a letter suffix at the end of its
// name, so we can cut the suffixes and combine blocks together to stay 
// consistent with 2010 census block definitions.

******2010 census block coordinates
// Import 2010 census block shapefile for each state and save as a Stata 
// shapefile. Shapefiles are downloaded from:
// https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2010&layergroup=Blocks
use "Shapefiles\Intermediate\2018_state.dta", clear // List of 48 states (plus DC) we need shapefiles for
levelsof STATEFP, local(FIPS)
foreach state of local FIPS{
	shp2dta using "Shapefiles\Raw\census_block_2010_shapefiles\tl_2010_`state'_tabblock10.shp", ///
		database("Shapefiles\Raw\census_block_2010_shapefiles\2010_state_`state'_block_database.dta") ///
		coordinates("Shapefiles\Raw\census_block_2010_shapefiles\2010_state_`state'_block_coordinates.dta") ///
		replace
}

// Clean up 2010 census block database to just get a list of each block's
// latitude and longitude coordinates--we'll merge on this later
levelsof STATEFP, local(FIPS)
foreach state of local FIPS{
	use "Shapefiles\Raw\census_block_2010_shapefiles\2010_state_`state'_block_database.dta"
	destring GEOID10 INTPTLAT10 INTPTLON10, replace
	keep GEOID10 INTPTLAT10 INTPTLON10
	ren GEOID10 census_block
	label variable census_block "15-digit census block FIPS ID"
	ren INTPTLAT10 block_latitude
	label variable block_latitude "Census block's centroid latitude"
	ren INTPTLON10 block_longitude 
	label variable block_longitude "Census block's centroid longitude"
	save "Shapefiles\Intermediate\census_block_2010_shapefiles\2010_state_`state'_block_centroids.dta", replace
}

// These individual shapefiles are too big to append all together. Instead, we
// later do a merge on each shapefile.

****************************LODES EMPLOYMENT DATA*******************************
// LODES WAC files are downloaded from: https://lehd.ces.census.gov/data/lodes/LODES7/.
// Each state-year file has one observation per census block and a variable 
// corresponding to that census block's employment. Census blocks with no 
// employment are not listed (most census blocks have no employment). The LODES 
// files are at the 2010 census block level.

// Import each state's files for 2011 and save as Stata file. Census block
// ID numbers are 15 digits and thus need to be read in as doubles.
foreach state in al ar az ca co ct dc de fl ga ia id il in ks ky la ma md me ///
	mi mn mo ms mt nc nd ne nh nj nm nv ny oh ok or pa ri sc sd tn tx ut ///
	va vt wa wi wv wy {
		 ******Workplace Area Characteristics (WAC) Data
		 // This contains the number of workers in each census block.
		 import delimited "Data\Raw\LODES\\`state'_wac_S000_JT00_2011.csv", asdouble clear
		 keep w_geocode c000 // Census block ID and total number of workers in census block
		 ren w_geocode census_block
		 label variable census_block "15-digit census block FIPS ID"
		 ren c000 emp_in_block_2011
		 label variable emp_in_block_2011 "Total employment in census block in 2011"
		 save "Data\Intermediate\LODES\\`state'_census_block_emp_2011.dta", replace
}

// Append all files together
use "Data\Intermediate\LODES\al_census_block_emp_2011.dta", replace
foreach state in ar az ca co ct dc de fl ga ia id il in ks ky la ma md me ///
	mi mn mo ms mt nc nd ne nh nj nm nv ny oh ok or pa ri sc sd tn tx ut ///
	va vt wa wi wv wy{
	 append using "Data\Intermediate\LODES\\`state'_census_block_emp_2011.dta"
}
save "Data\Intermediate\LODES\all_census_blocks_emp_2011.dta", replace

************************CENSUS DATA ON POPULATION*******************************
// Here we use 2010 census data on population in each census block. Data is 
// downloaded from the NHGIS, series "Total Population" from dataset "2010_SF1a".
// This has data on all census blocks, including those with no population.

// Import file with data on each block. Note that the file name contains an 
// "extract number" (here, 0016). Separate NHGIS downloads may require the file
// to be changed, even if the data series is the same.
import delimited "Data\Raw\NHGIS\nhgis0016_ds172_2010_block.csv", asdouble clear

// Generate census block IDs
destring statea countya tracta blocka, replace
gen double census_block=statea*1e13+countya*1e10+tracta*1e4+blocka
format census_block %15.0f

// Keep only variables for population and census block ID and save
keep census_block h7v001
ren h7v001 population_in_block_2010
label variable census_block "15-digit census block FIPS ID"
label variable population_in_block_2010 "Total population in census block in 2010"
order census_block population_in_block_2010
save "Data\Intermediate\NHGIS\all_census_blocks_population_2010.dta", replace

**************************MERGE FILES TOGETHER**********************************
// First, merge together data on population and employment. Then, we merge to 
// get data on each census block's latitude-longitude coordinates.
use "Data\Intermediate\NHGIS\all_census_blocks_population_2010.dta", clear
merge 1:1 census_block using "Data/Intermediate/LODES/all_census_blocks_emp_2011.dta", keepusing(emp_in_block_2011) assert(1 3)
// Census blocks with "missing" employment have zero employees
replace emp_in_block_2011=0 if _merge==1
drop _merge

// This step is a little tedious--merge with each state's database file containing
// its latitude longitude coordinates
preserve
	use "Shapefiles\Intermediate\2018_state.dta", clear // List of 48 states (plus DC) we need shapefiles for
	levelsof STATEFP, local(FIPS)
restore
foreach state of local FIPS{
	merge 1:1 census_block using "Shapefiles\Intermediate\census_block_2010_shapefiles\2010_state_`state'_block_centroids.dta", update keepusing(block_latitude block_longitude) assert(1 3 4) nogen
}
save "Data\Intermediate\Census_block_coordinates_employment_population.dta", replace

*********************AGGREGATING DATA AT THE GRID LEVEL*************************
// First, for different values of M we use the geoinpoly command to assign census
// blocks to squares based on centroid locations (census blocks are small enough
// that most are entirely contained within a square). We then total population 
// and employment across blocks in each square.

use "Data\Intermediate\Census_block_coordinates_employment_population.dta", clear
// Now, use the geoinpoly command to assign each census block to a grid cell
// for our various values of M.
foreach m of numlist 3 6 12 24 48 {
	geoinpoly block_latitude block_longitude using "Shapefiles\Final\US_`m'_mile_square_cells_latitude_longitude.dta", unique
	ren _ID ID_`m'
	label variable ID_`m' "ID code for `m' by `m' mile square in grid" 
	di "`m' completed"
}
save "Data\Intermediate\Census_block_coordinates_employment_population_and_grid_IDs.dta", replace

// For each value of M, compute total population and employment, and save a file 
// with the values for each size-M square. 
use "Data\Intermediate\Census_block_coordinates_employment_population_and_grid_IDs.dta", clear

// For each M x M grid, calculate total employment and population, then save
// a list of grid cells
foreach m of numlist 3 6 12 24 48 {
	preserve
		keep census_block population_in_block_2010 emp_in_block_2011 ID_`m'
		drop if missing(ID_`m')
		
		// Population and employment totals
		by ID_`m', sort: egen total_pop = total(population_in_block_2010)
		by ID_`m', sort: egen total_emp = total(emp_in_block_2011)
		drop census_block population_in_block_2010 emp_in_block_2011

		// Keep one observation per size-M square		
		by ID_`m', sort: keep if _n == 1
		
		// Merge to get latitude-longitude data for squares
		ren ID_`m' spgrid_id
		merge 1:1 spgrid_id using "Shapefiles\Final\US_`m'_mile_square_points_latitude_longitude.dta", assert(2 3) nogen
		ren spgrid_xcoord grid_longitude
		ren spgrid_ycoord grid_latitude
		ren spgrid_id ID_`m'

		// Calculate population and employment density per square
		foreach measure in pop emp {
			replace total_`measure' = 0 if missing(total_`measure')
			gen `measure'_density_`m' = total_`measure' / `m'^2
		}

		// Drop block-level variables
		keep ID_`m' total_pop total_emp pop_density_`m' emp_density_`m' grid_latitude grid_longitude
		save "Data\Intermediate\grid_population_and_employment_data\population_and_employment_data_M`m'.dta", replace
	restore
}

